Log molariry of creatine is added as an input variable for URX
chemicals and left out for LBX chemicals.
options(survey.lonely.psu = "adjust")
options(survey.adjust.domain.lonely = TRUE)
suppressMessages(library(pROC))
datalist <- vector(mode = "list", length = desired_length)
# Logreg
for(i in seq(1:desired_length)) {
demo_dat %>%
dplyr::select(!starts_with("WT_")) %>%
left_join(chem_vars_for_analysis %>%
filter(chemical==chem_names[i]) %>%
select(-chemical)) -> temp_dat1
demo_dat %>%
dplyr::select(weight_names[i]) -> temp_dat2
colnames(temp_dat2) <- c("WT")
cbind(temp_dat1, temp_dat2) %>% filter(!is.na(WT)) %>%
mutate(nomiss.raw = !is.na(farmwork) & !is.na(DMDCITZN) &
!is.na(indiv_chem_bioactiv) & RIDAGEYR >= min_age & WT > 0,
nomiss.lb.adj = !is.na(farmwork) & !is.na(DMDCITZN) &
!is.na(indiv_chem_bioactiv) & !is.na(BMXBMI) & !is.na(RIDAGEYR) &
!is.na(SDDSRVYR) & !is.na(RIAGENDR) & !is.na(edu) &
!is.na(RIDRETH1) & RIDAGEYR >= min_age & WT > 0,
nomiss.ur.adj = !is.na(farmwork) & !is.na(DMDCITZN) &
!is.na(indiv_chem_bioactiv) & !is.na(BMXBMI) & !is.na(RIDAGEYR) &
!is.na(SDDSRVYR) & !is.na(RIAGENDR) & !is.na(edu) &
!is.na(RIDRETH1) & !is.na(URXUCR_umol) & RIDAGEYR >= min_age &
WT > 0) -> temp_dat
svydesign(strata = temp_dat$SDMVSTRA, id = temp_dat$SDMVPSU,
weights = temp_dat$WT, data = temp_dat, nest=T) -> nhanes.svy
subset(nhanes.svy, nomiss.raw) -> svy.nomiss.raw
temp_dat %>% filter(nomiss.raw) -> no.na.data
sum(no.na.data$indiv_chem_bioactiv)/length(no.na.data$indiv_chem_bioactiv) -> outcomes1
if(grepl('^LB', chem_names[i])){
subset(nhanes.svy, nomiss.lb.adj) -> svy.nomiss.adj
temp_dat %>% filter(nomiss.lb.adj) -> no.na.data2
} else {
subset(nhanes.svy, nomiss.ur.adj) -> svy.nomiss.adj
temp_dat %>% filter(nomiss.ur.adj) -> no.na.data2
}
#no.na.data2 %>% select(indiv_chem_bioactiv) %>% distinct() %>% nrow() -> outcomes2
sum(no.na.data2$indiv_chem_bioactiv)/length(no.na.data2$indiv_chem_bioactiv) -> outcomes2
no.na.data %>% dplyr::select(SDMVSTRA, SEQN) %>%
group_by(SDMVSTRA) %>% summarise(n=as.numeric(n()))->Ntotal
as.data.frame(as.matrix(Ntotal))->Ntotal
round(svytable(~farmwork, svy.nomiss.raw, Ntotal=Ntotal)[2]) -> estNgroup
no.na.data %>% nrow() -> n
no.na.data2 %>% dplyr::select(SDMVSTRA, SEQN) %>%
group_by(SDMVSTRA) %>% summarise(n=as.numeric(n()))->Ntotal2
as.data.frame(as.matrix(Ntotal2))->Ntotal2
round(svytable(~farmwork, svy.nomiss.adj, Ntotal=Ntotal2)[2]) -> estNgroup2
no.na.data2 %>% nrow() -> n2
if (min(outcomes1,outcomes2)<0.015) {
data.frame(chem = c(chem_names[i]),
model = c("Unadjusted", "Adjusted"),
variable = c("FarmWorker", "FarmWorker",
"NonCitizen", "NonCitizen"),
weight = c(weight_names[i]),
estNgroup= c(estNgroup, estNgroup2),
N = c(n , n2),
OR = c(NA_real_, NA_real_),
CI_2.5 = c(NA_real_, NA_real_),
CI_97.5 = c(NA_real_, NA_real_),
t = c(NA_real_, NA_real_),
#maxVIF = c(NA_real_, NA_real_),
ROC = c(NA_real_, NA_real_),
p = c(NA_real_, NA_real_)
) -> data_rows
} else {
#model included
svyglm(indiv_chem_bioactiv ~ farmwork + DMDCITZN, design=svy.nomiss.raw,
family = quasibinomial(link = "logit")) -> fw_raw
#model included
if(grepl('^LB', chem_names[i])){
svyglm(indiv_chem_bioactiv ~ farmwork + DMDCITZN + BMXBMI +
RIDAGEYR + SDDSRVYR + RIAGENDR + edu + RIDRETH1,
design = svy.nomiss.adj, family = quasibinomial(link = "logit")) -> fw_adj
} else {
svyglm(indiv_chem_bioactiv ~ farmwork + DMDCITZN + BMXBMI +
RIDAGEYR + SDDSRVYR + RIAGENDR + edu + RIDRETH1 + log(URXUCR_umol),
design = svy.nomiss.adj, family = quasibinomial(link = "logit")) -> fw_adj
}
# Max VIF
#max(summ(fw_adj, vifs=TRUE)$coeftable[,5], na.rm=TRUE) -> vif_fw_adj
# Discrimination (ROC)
tp.fp_raw <- WeightedROC(fitted(fw_raw),svy.nomiss.raw$variables$indiv_chem_bioactiv,
no.na.data$WT)
WeightedAUC(tp.fp_raw) -> roc_raw
tp.fp_adj <- WeightedROC(fitted(fw_adj),svy.nomiss.adj$variables$indiv_chem_bioactiv,
no.na.data2$WT)
WeightedAUC(tp.fp_adj) -> roc_adj
data.frame(chem = c(chem_names[i]),
model = c("Unadjusted", "Adjusted"),
variable = c("FarmWorker", "FarmWorker",
"NonCitizen", "NonCitizen"),
weight = c(weight_names[i]),
estNgroup= c(estNgroup, estNgroup2),
N = c(n , n2),
OR = c(exp(coefficients(fw_raw)[2]),
exp(coefficients(fw_adj)[2]),
exp(coefficients(fw_raw)[3]),
exp(coefficients(fw_adj)[3])),
CI_2.5 = c(exp(confint(fw_raw, ddf=degf(fw_raw$survey.design)))[2,1],
exp(confint(fw_adj, ddf=degf(fw_adj$survey.design)))[2,1],
exp(confint(fw_raw, ddf=degf(fw_raw$survey.design)))[3,1],
exp(confint(fw_adj, ddf=degf(fw_adj$survey.design)))[3,1]),
CI_97.5 = c(exp(confint(fw_raw, ddf=degf(fw_raw$survey.design)))[2,2],
exp(confint(fw_adj, ddf=degf(fw_adj$survey.design)))[2,2],
exp(confint(fw_raw, ddf=degf(fw_raw$survey.design)))[3,2],
exp(confint(fw_adj, ddf=degf(fw_adj$survey.design)))[3,2]),
t = c(summary(fw_raw, df.resid = degf(fw_raw$survey.design))$coef[2,3],
summary(fw_adj, df.resid = degf(fw_adj$survey.design))$coef[2,3],
summary(fw_raw, df.resid = degf(fw_raw$survey.design))$coef[3,3],
summary(fw_adj, df.resid = degf(fw_adj$survey.design))$coef[3,3]),
#maxVIF = c(NA_real_, vif_fw_adj),
ROC = c(roc_raw,
roc_adj),
p = c(summary(fw_raw)$coefficients[2,4],
summary(fw_adj)$coefficients[2,4],
summary(fw_raw)$coefficients[3,4],
summary(fw_adj)$coefficients[3,4])
) -> data_rows
}
datalist[[i]] <- data_rows
#print(i)
}
do.call(rbind, datalist) -> logreg_results_fw
# Organize into unadjusted and adjusted tables
logreg_results_fw %>%
filter(model == "Unadjusted") -> logreg_results_raw
logreg_results_fw %>%
filter(model == "Adjusted") -> logreg_results_adj
# Format for tables
logreg_results_raw %>%
mutate(p.fdr = p.adjust(p, method="fdr"),
p.fdr.sig = ifelse(p.fdr<0.05,"*","")) %>%
mutate(OR = if_else(OR < 0.0001,
formatC(OR, digits = 4, format = "e"),
formatC(OR, digits = 4, format = "f", drop0trailing = FALSE)),
CI_2.5 = if_else(CI_2.5 < 0.0001,
formatC(CI_2.5, digits = 4, format = "e"),
formatC(CI_2.5, digits = 4, format = "f", drop0trailing = FALSE)),
CI_97.5 = if_else(CI_97.5 < 0.0001,
formatC(CI_97.5, digits = 4, format = "e"),
formatC(CI_97.5, digits = 4, format = "f", drop0trailing = FALSE)),
p = if_else(p < 0.01,
formatC(p, digits = 4, format = "e"),
formatC(p, digits = 4, format = "f", drop0trailing = FALSE)),
p.fdr = if_else(p.fdr < 0.01,
formatC(p.fdr, digits = 4, format = "e"),
formatC(p.fdr, digits = 4, format = "f", drop0trailing = FALSE)),
) %>%
mutate_if(is.numeric, function(x) {
formatC(x, digits = 4, format = "f", drop0trailing = FALSE)
}) %>%
mutate(estNgroup =as.integer(estNgroup),
N =as.integer(N)) -> logreg_table_raw
logreg_results_adj %>%
mutate(p.fdr = p.adjust(p, method="fdr"),
p.fdr.sig = ifelse(p.fdr<0.05,"*","")) %>%
mutate(OR = if_else(OR < 0.0001,
formatC(OR, digits = 4, format = "e"),
formatC(OR, digits = 4, format = "f", drop0trailing = FALSE)),
CI_2.5 = if_else(CI_2.5 < 0.0001,
formatC(CI_2.5, digits = 4, format = "e"),
formatC(CI_2.5, digits = 4, format = "f", drop0trailing = FALSE)),
CI_97.5 = if_else(CI_97.5 < 0.0001,
formatC(CI_97.5, digits = 4, format = "e"),
formatC(CI_97.5, digits = 4, format = "f", drop0trailing = FALSE)),
p = if_else(p < 0.01,
formatC(p, digits = 4, format = "e"),
formatC(p, digits = 4, format = "f", drop0trailing = FALSE)),
p.fdr = if_else(p.fdr < 0.01,
formatC(p.fdr, digits = 4, format = "e"),
formatC(p.fdr, digits = 4, format = "f", drop0trailing = FALSE)),
) %>%
mutate_if(is.numeric, function(x) {
formatC(x, digits = 4, format = "f", drop0trailing = FALSE)
}) %>%
mutate(estNgroup =as.integer(estNgroup),
N =as.integer(N)) -> logreg_table_adj